Movie Review Web Application

플라스크 웹 기반으로 사용자가 영화 리뷰를 입력하고, 입력한 리뷰에 대한 예측을 출력하는 웹 애플리케이션
사용자가 예측한 레이블에 대하여 피드백을 보낼 수 있으며, 사용자의 피드백에 따라 분류 모델을 업데이트 한다.
사용자가 입력한 영화 리뷰 텍스트와 예측한 클래스 레이블을 SQLite 데이터베이스에 저장한다.
structure of directory
moiveclassifier/
    app.py
    pkl_objects
        classifier.pkl
        stopwords.pkl
    reviews.sqlite
    stateic/
        style.css
    templates/
        _formhelpers.html
        results.html
        reviewform.html
        thanks.html
    vectorizer.py
app.py
from flask import Flask, render_template, request
from wtforms import Form, TextAreaField, validators
import pickle
import sqlite3
import os
import numpy as np
from vectorizer import vect
app=Flask(__name__)
#
cur_dir=os.path.dirname(__file__)
clf=pickle.load(open(os.path.join(cur_dir, 'pkl_objects', 'classifier.pkl'), 'rb'))
db=os.path.join(cur_dir, 'reviews.sqlite')
def classify(document):
label={0: 'negative', 1:'positive'}
X=vect.transform([document])
y=clf.predict(X)[0]
proba=np.max(clf.predict_proba(X))
return label[y], proba
def train(document, y):
X=vect.transform([document])
clf.partial_fit(X, [y])
def sqlite_entry(path, document, y):
conn=sqlite3.connect(path)
c=conn.cursor()
c.execute("INSERT INTO review_db (review, sentiment, date) VALUES (?, ?, DATETIME('now'))", (document, y))
conn.commit()
conn.close()
# FLASK
class ReviewForm(Form):
moviereview=TextAreaField('', [validators.DataRequired(), validators.length(min=15)])
@app.route('/')
def index():
form=ReviewForm(request.form)
return render_template('reviewform.html', form=form)
@app.route('/results', methods=['POST'])
def results():
form=ReviewForm(request.form)
if request.method=='POST' and form.validate():
review=request.form['moviereview']
y, proba=classify(review)
return render_template('results.html', content=review, prediction=y, probability=round(proba*100, 2))
return render_template('reviewform.html', form=form)
@app.route('/thanks', methods=['POST'])
def feedback():
feedback=request.form['feedback_button']
review=request.form['review']
prediction=request.form['prediction']
inv_label={'negative':0, 'positive':1}
y=inv_label[prediction]
if feedback=='Incorrect':
y=int(not(y))
train(review, y)
sqlite_entry(db, review, y)
return render_template('thanks.html')
if __name__=='__main__':
app.run(debug=True)
reviewform.html
<!doctype html>
<html>
<head>
<title>classify movie review</title>
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h2>please enter your review about movie:</h2>
{% from "_formhelper.html" import render_field %}
<form method=post action="/results">
<dl>
{{ render_field(form.moviereview, cols='30', rows='10') }}
</dl>
<div>
<input type=submit value='enter revie' name='submit_btn'>
</div>
</form>
</body>
</html>
results.html
<!doctype html>
<html>
<head>
<title>Movie Classification</title>
<link rel="style sheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h3>Entered movie review:</h3>
<div>{{ content }}</div>
<h3>predict:</h3>
<div>prediction about this movie is
<strong>{{ prediction=='positive' and 'positive' or 'negative'}}</strong>
(probability: {{ probability }}%).</div>
<div class='button'>
<form action="/thanks" method="post">
<input type=submit value='True' name='feedback_button'>
<input type=submit value='False' name='feedback_button'>
<input type=hidden value='{{ prediction }}' name='prediction'>
<input type=hidden value='{{ content }}' name='review'>
</form>
</div>
<div class='button'>
<form action="/">
<input type=submit value='enter more review'>
</form>
</div>
</body>
</html>
style.css
body{
width:600px;
}
.button{
padding-top: 20px;
}
thanks.html
<!doctype html>
<html>
<head>
<title>classify movie</title>
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h3>Thanks for your review!</h3>
<div class='button'>
<form action="/">
<input type=submit value='enter more review'>
</form>
</div>
</body>
</html>
위 애플리케이션은 classifier.pkl 파일에 clf 객체에 업데이트한 내용이 저장되지 않는다.
만약 업데이트된 내용을 영구적으로 유지하고 싶다면 업데이트될 때마다, clf 객체를 다시 pickle 모듈로 직렬화 해야 한다.
이는 계산 비용이 비싸기 때문에 매우 비효율적이다.
또한 동시에 여러 사용자가 피드백을 전달하면 pickle 파일이 손상될 수 있다.

대신 SQLite 데이터베이스에 수집된 피드백 데이터를 사용하여 예측 모델을 업데이트할 수 있다.
/movieclassifier/update.py
import pickle
import sqlite3
import numpy as np
import os
from vectorizer import vect
def update_model(db_path, model, batch_size=10000):
conn=sqlite3(db_path)
c=conn.cursor()
c.execute('SELECT * from review db')
results=c.fetchmany(batch_size)
while results:
data=np.array(results)
X=data[:, 0]
y=data[:, 1].astype(int)
classes=np.array([0, 1])
X_train=vect.transform(X)
model.partial_fit(X_train, y, classes=classes)
results=c.fetchmany(batch_size)
conn.close()
return model
cur_dir=os.path.dirname(__file__)
clf=pickle.load(open(os.path.join(cur_dir, 'pkl_objects', 'classifier.pkl'), 'rb'))
db=os.path.join(cur_dir, 'revies.sqlite')
clf=update_model(db_path=db, model=clf, batch_size=10000)
#
#pickle.dum(clf, open(os.path.join(cur_dir, 'pkl_objects', 'classifier.pkl'), 'wb'), protocol=4)